Rational Epicardial Reprogramming with 10 Transcription Factors by Reprogram-Seq

Jialei Duan

2022-02-01


Duan, J., Li, B., Bhakta, M., Xie, S., Zhou, P., Munshi, N.V., and Hon, G.C. (2019). Rational Reprogramming of Cellular States by Combinatorial Perturbation. Cell Rep. 27, 3486–3499.e6.


Rendered at 2022-02-01 14:01:07 America/Chicago.


Load required packages.

[1] "2022-02-01 14:01:08 CST"
[1] "America/Chicago"

Data preparation

Functions loading

source(file = file.path(SCRIPT_DIR, "utilities.R"))

Data loading

PROJECT_DIR <- file.path(
    "/Users/jialei/Maestral/Data/Projects/UTSW/Cellular_reprogramming",
    "Cardiac_reprogramming/Data_submission/Github"
)
gene_symbols <- vroom::vroom(
    file = file.path(
        PROJECT_DIR, "data", "misc", "genes.tsv"
    ),
    col_names = FALSE
)

gene_symbols <- setNames(object = gene_symbols$X2, nm = gene_symbols$X1)
gene_symbols |> head()
ENSMUSG00000051951 ENSMUSG00000089699 ENSMUSG00000102343 ENSMUSG00000025900 
            "Xkr4"           "Gm1992"          "Gm37381"              "Rp1" 
ENSMUSG00000109048 ENSMUSG00000025902 
             "Rp1"            "Sox17" 
length(gene_symbols)
[1] 27999

Matrix

matrix_readcount_use <- Matrix::sparseMatrix(
    i = readRDS(
        file.path(
            PROJECT_DIR, "data/10x", "expr_readcount_raw_csc_indices.rds"
        )
    ),
    p = readRDS(
        file.path(
            PROJECT_DIR, "data/10x", "expr_readcount_raw_csc_indptr.rds"
        )
    ),
    x = readRDS(
        file.path(
            PROJECT_DIR, "data/10x", "expr_readcount_raw_csc_values.rds"
        )
    ),
    dims = readRDS(
        file.path(
            PROJECT_DIR, "data/10x", "expr_readcount_raw_csc_shape.rds"
        )
    ),
    dimnames = readRDS(
        file.path(
            PROJECT_DIR, "data/10x", "expr_readcount_raw_csc_dimnames.rds"
        )
    ),
    index1 = FALSE
)
dim(matrix_readcount_use)
[1] 27999 34564
rownames(matrix_readcount_use) <- paste(
    rownames(matrix_readcount_use),
    gene_symbols[rownames(matrix_readcount_use)],
    sep = "_"
)

matrix_readcount_use[1:5, 1:5] |>
    as.matrix() |>
    knitr::kable()
BL5_AAACCTGCACTACAGT BL5_AAACCTGCAGTACACT BL5_AAACCTGGTTCACGGC BL5_AAACCTGGTTGCTCCT BL5_AAACCTGTCCAACCAA
ENSMUSG00000051951_Xkr4 0 0 0 0 0
ENSMUSG00000089699_Gm1992 0 0 0 0 0
ENSMUSG00000102343_Gm37381 0 0 0 0 0
ENSMUSG00000025900_Rp1 0 0 0 0 0
ENSMUSG00000109048_Rp1 0 0 0 0 0

Embedding

embedding <- readRDS(
    file = file.path(
        PROJECT_DIR, "data/10x/epicardial_10f_d7", "tsne_out_coords.rds"
    )
)
dim(embedding)
[1] 22140     6


Check memory usage.

purrr::walk(
    list(matrix_readcount_use, embedding), \(x) {
        print(object.size(x), units = "auto", standard = "SI")
    }
)
1.1 GB
2.8 MB

Clustering

x_column <- "x"
y_column <- "y"

GEOM_POINT_SIZE <- 0.5
EMBEDDING_TITLE_PREFIX <- "t-SNE"
RASTERISED <- FALSE

Embedding

embedding |>
    tibble::rownames_to_column(var = "cell") |>
    dplyr::mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_features = colSums(matrix_readcount_use[, cell] > 0),
    ) |>
    dplyr::rename(batch = batch.id) |>
    dplyr::group_by(batch) |>
    dplyr::summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_features = median(num_features)
    ) |>
    dplyr::mutate(
        group = dplyr::case_when(
            batch %in% c("BL5", "BL6") ~ "Primary",
            batch %in% c("BL7") ~ "Control",
            TRUE ~ "Reprogrammed"
        ),
        group = factor(
            group,
            levels = c("Primary", "Control", "Reprogrammed")
        )
    ) |>
    dplyr::arrange(group) |>
    dplyr::select(
        group, dplyr::everything()
    ) |>
    gt::gt() |>
    gt::data_color(
        columns = c(group),
        colors = scales::col_factor(
            palette = paletteer::paletteer_d(
                n = 3, palette = "colorblindr::OkabeIto"
            ) %>% as.character(),
            domain = NULL
        )
    ) |>
    gt::data_color(
        columns = c(median_umis),
        colors = scales::col_numeric(
            palette = c(
                "green", "orange", "red"
            ),
            domain = NULL
        )
    ) |>
    gt::summary_rows(
        columns = c(batch),
        fns = list(
            Count = ~ n()
        ),
        decimals = 0
    ) |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    ) |>
    gt::summary_rows(
        columns = c(median_umis:median_features),
        fns = list(
            Mean = ~ mean(.)
        ),
        decimals = 2
    ) |>
    gt::tab_header(
        title = gt::md("**10x Genomics**; Batch")
    )
10x Genomics; Batch
group batch num_cells median_umis median_features
Primary BL5 5888 4831 1673.5
Primary BL6 6047 4521 1570.0
Control BL7 4914 13713 3571.0
Reprogrammed BL8 5291 10534 3043.0
Count 4
Sum 22,140
Mean 8,399.75 2,464.38
embedding <- embedding |>
    tibble::rownames_to_column(var = "cell") |>
    dplyr::mutate(
        group = dplyr::case_when(
            category %in% c("BL5", "BL6") ~ "Primary",
            category %in% c("BL7") ~ "Control",
            TRUE ~ "Reprogrammed"
        ),
        group = factor(
            group,
            levels = c("Primary", "Reprogrammed", "Control")
        )
    )

Cluster

p_embedding_cluster <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = embedding$cluster |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Cluster"),
    color_labels = TRUE,
    color_legend = FALSE,
    sort_values = FALSE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) +
    theme_customized_embedding()

p_embedding_UMI <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = log10(Matrix::colSums(matrix_readcount_use[, embedding$cell])),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
    color_legend = TRUE,
    sort_values = FALSE,
    shuffle_values = TRUE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) +
    theme_customized_embedding()

p_embedding_MT <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = (colSums(matrix_readcount_use[
        stringr::str_detect(
            string = stringr::str_remove(
                string = rownames(matrix_readcount_use),
                pattern = "^E.+_"
            ),
            pattern = "mt-"
        ),
    ]) / colSums(matrix_readcount_use))[embedding$cell],
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
    color_legend = TRUE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) + theme_customized_embedding()

p_embedding_group <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = embedding$group |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Group"),
    color_labels = FALSE,
    color_legend = TRUE,
    sort_values = FALSE,
    shuffle_values = TRUE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) +
    theme_customized_embedding() +
    ggplot2::scale_color_manual(
        values = c(
            Primary = "#00AFBB",
            Reprogrammed = "#8BC34A",
            Control = "#E7B800"
        )
    )
embedding |>
    dplyr::mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_features = colSums(matrix_readcount_use[, cell] > 0),
    ) |>
    dplyr::group_by(cluster) |>
    dplyr::summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_features = median(num_features)
    ) |>
    gt::gt() |>
    gt::data_color(
        columns = c(median_umis),
        colors = scales::col_numeric(
            palette = c(
                "green", "orange", "red"
            ),
            domain = NULL
        )
    ) |>
    gt::summary_rows(
        columns = c(cluster),
        fns = list(
            Count = ~ n()
        ),
        decimals = 0
    ) |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    ) |>
    gt::tab_header(
        title = gt::md("**10x Genomics**; Clustering")
    )
10x Genomics; Clustering
cluster num_cells median_umis median_features
1 3240 15464.0 3820.0
2 2259 2390.0 997.0
3 1914 12506.5 3352.5
4 1707 14631.0 3845.0
5 1330 5696.5 1564.5
6 1329 5189.0 1451.0
7 1222 6010.0 2147.0
8 1213 7804.0 2151.0
9 1093 5399.0 2239.0
10 1082 11149.5 3049.0
11 850 7452.5 2738.0
12 848 2341.0 773.0
13 638 3925.5 985.0
14 578 4939.5 2170.5
15 508 5815.0 1808.0
16 404 5027.5 1714.0
17 393 5241.0 2151.0
18 380 7976.5 2464.5
19 299 6724.0 2777.0
20 280 5360.5 1906.5
21 148 2981.5 1140.5
22 117 5305.0 1954.0
23 95 2586.0 1370.0
24 92 4966.5 280.0
25 64 6244.5 2619.5
26 57 4898.0 251.0
Count 26
Sum 22,140
purrr::reduce(
    list(
        p_embedding_cluster,
        p_embedding_UMI,
        p_embedding_MT,
        p_embedding_group
    ),
    `+`
) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )


Attaching package: 'formattable'
The following object is masked from 'package:patchwork':

    area
embedding |>
    dplyr::mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_features = colSums(matrix_readcount_use[, cell] > 0),
    ) |>
    dplyr::group_by(group) |>
    dplyr::summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_features = median(num_features)
    ) |>
    formattable::formattable(
        list(
            # num_cells = formattable::color_tile("transparent", "lightpink"),
            num_cells = formattable::color_bar("Lightpink"),
            median_umis = formattable::color_bar("lightgreen"),
            median_features = formattable::color_bar("lightblue")
        ),
        full_width = FALSE,
        caption = "10x Genomics; Group"
    )
10x Genomics; Group
group num_cells median_umis median_features
Primary 11935 4672 1620
Reprogrammed 5291 10534 3043
Control 4914 13713 3571
purrr::map(levels(embedding$group), \(x) {
    plot_embedding(
        data = embedding[, c(x_column, y_column)],
        color = as.integer(embedding$group == x) |> as.factor(),
        label = glue::glue(
            "{EMBEDDING_TITLE_PREFIX}; {x}: {sum(embedding$group == x)}"
        ),
        color_labels = FALSE,
        color_legend = FALSE,
        sort_values = FALSE,
        shuffle_values = TRUE,
        rasterise = RASTERISED,
        geom_point_size = GEOM_POINT_SIZE
    ) +
        theme_customized_embedding() +
        ggplot2::scale_color_manual(
            values = c("grey70", "salmon")
        )
}) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Extract colors from the initial plots to keep colors consistent.

color_palette <- ggplot2::ggplot_build(p_embedding_cluster)$data[[1]] |>
    dplyr::select(color = colour, cluster = group) |>
    unique() |>
    dplyr::arrange(cluster)

color_palette <- setNames(
    object = color_palette$color,
    nm = color_palette$cluster
)

scales::show_col(color_palette)

Polished

p_embedding_cluster <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = embedding$cluster |> as.factor(),
    label = NULL,
    color_labels = TRUE,
    color_legend = FALSE,
    sort_values = FALSE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) +
    theme_customized_embedding(void = TRUE)

p_embedding_cluster

cluster_labels <- embedding |>
    dplyr::group_by(cluster) |>
    dplyr::summarise(
        x = median(x),
        y = median(y)
    ) |>
    as.data.frame()
cluster_labels[25, c(2, 3)] <- c(-32.5, -8.5)

polygon_coordinates_cluster10 <- data.frame(
    x = c(
        -23, -26, -26, -24, -25.5, -27, -25, -24, -23, -20.5,
        -18.5, -18, -19.5, -18.5, -19, -18, -18, -17.5, -17, -15.5,
        -17, -16, -15, -14, -12, -11.5, -12, -10.5, -10.5, -9.5,
        -9, -7, -7, -6, -5, -5.5, -5.5, -5.5, -5, -5,
        -3.5, -6, -9, -9.5, -11.5, -13, -19
    ),
    y = c(
        -25, -21, -18, -15, -14.5, -11, -4, -1, 3, 0,
        3, 2.5, -7, -9, -12, -11, -12.5, -12, -11, -10.5,
        -9, -8, -8, -10, -9.5, -10.5, -14, -14.5, -13, -12.5,
        -11.5, -11, -12, -12, -12, -15, -18, -18, -18, -19,
        -20, -22, -21, -23, -25, -25.5, -25.5
    )
)

layers <- list(
    ggplot2::annotate(
        geom = "text",
        x = cluster_labels[, "x"],
        y = cluster_labels[, "y"], label = cluster_labels[, 1],
        parse = TRUE,
        size = 2,
        color = c("black")
    ),
    ggplot2::geom_polygon(
        data = polygon_coordinates_cluster10,
        ggplot2::aes(x, y), fill = NA, color = "black", size = .3
    ),
    ggplot2::annotate(
        geom = "path",
        x = -32.5 + 2.5 * cos(seq(0, 2 * pi, length.out = 100)),
        y = -17 + 32.5 / 16.5 * 2.5 * sin(seq(0, 2 * pi, length.out = 100)),
        color = "#00BFC4",
        size = .3
    )
)

p_embedding_reprogrammed <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = as.integer(embedding$group == "Reprogrammed") |> as.factor(),
    label = NULL,
    color_labels = TRUE,
    color_legend = FALSE,
    sort_values = TRUE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) +
    theme_customized_embedding(void = TRUE) +
    ggplot2::scale_color_manual(
        values = c("grey70", "salmon")
    ) +
    layers +
    ggplot2::annotate(
        geom = "text",
        x = -82.06687,
        y = 76.43042,
        label = "10F\nreprogrammed\nMEFs",
        family = "Arial",
        color = "#FF5722",
        size = 2.5,
        vjust = "inward", hjust = "inward"
    ) +
    ggplot2::annotate(
        geom = "text",
        family = "Arial",
        x = -72,
        y = -67,
        label = "Primary\nepicardial cells",
        color = "#00BFC4",
        size = 2,
        vjust = "inward", hjust = "inward"
    ) +
    ggplot2::annotate(
        geom = "segment",
        x = -42,
        xend = -35,
        y = -55,
        yend = -24,
        color = "#00BFC4", size = .2,
        arrow = ggplot2::arrow(
            length = ggplot2::unit(1, "mm"), ends = "last",
            type = "closed"
        )
    )

p_embedding_uninfected <- plot_embedding(
    data = embedding[, c(x_column, y_column)],
    color = as.integer(embedding$group == "Control") |> as.factor(),
    label = NULL,
    color_labels = TRUE,
    color_legend = FALSE,
    sort_values = TRUE,
    rasterise = RASTERISED,
    geom_point_size = GEOM_POINT_SIZE
) +
    theme_customized() +
    theme_customized_void() +
    ggplot2::scale_color_manual(
        values = c("grey70", "salmon")
    ) +
    layers +
    ggplot2::annotate(
        geom = "text",
        x = -82.06687,
        y = 76.43042,
        label = "Uninfected\nMEFs",
        family = "Arial",
        color = "#FF5722",
        size = 2.5,
        vjust = "inward", hjust = "inward"
    )
list(
    p_embedding_reprogrammed,
    p_embedding_uninfected
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Composition

calc_group_composition(
    data = embedding,
    x = "cluster",
    group = "group"
) |>
    dplyr::mutate(
        cluster = factor(
            cluster
        )
    ) |>
    plot_barplot(
        x = "cluster",
        y = "percentage",
        z = "group",
        legend_ncol = 1,
    ) +
    ggplot2::scale_fill_manual(
        values = c(
            Primary = "#00AFBB",
            Reprogrammed = "#8BC34A",
            Control = "#E7B800"
        )
    )

clusters_selected <- c(1, 3, 4, 6, 10, 16, 17, 22)

purrr::map(clusters_selected, \(x) {
    cells_1 <- embedding$cell[
        (embedding$cluster == x) & (embedding$category == "BL8")
    ]
    cells_2 <- embedding$cell[
        (embedding$cluster == x) & (embedding$category == "BL7")
    ]
    data.frame(
        cluster = x,
        ratio = length(cells_1) / length(cells_2)
    )
}) |>
    dplyr::bind_rows() |>
    dplyr::mutate(
        cluster = factor(
            cluster,
            levels = clusters_selected |> rev()
        )
    ) |>
    ggplot(
        aes(
            y = cluster,
            x = ratio,
            fill = cluster
        )
    ) +
    ggplot2::geom_bar(stat = "identity") +
    theme_customized_violin() +
    ggplot2::guides(fill = "none") +
    ggplot2::scale_y_discrete(
        name = "MEF-derived cluster"
    ) +
    ggplot2::scale_x_continuous(
        name = "Ratio\n(reprogrammed / uninfected MEF)",
        breaks = seq(1, 9, 2)
    ) +
    ggplot2::scale_fill_manual(
        values = c(
            rep("grey35", 3),
            "#FF5722",
            rep("grey35", 4)
        )
    ) +
    ggplot2::geom_vline(xintercept = 1, linetype = 2, size = .2)

Expression

Embedding

FEATURES_SELECTED <- c(
    "ENSMUSG00000009471_Myod1",
    "ENSMUSG00000026414_Tnnt2",
    "ENSMUSG00000016458_Wt1",
    "ENSMUSG00000025105_Bnc1",
    "ENSMUSG00000049382_Krt8",
    "ENSMUSG00000079018_Ly6c1",
    "ENSMUSG00000049436_Upk1b",
    "ENSMUSG00000021391_Cenpp"
)
purrr::map(FEATURES_SELECTED, \(x) {
    selected_feature <- x

    cat(selected_feature, "\n")
    values <- log10(
        calc_cpm(matrix_readcount_use[, embedding$cell])
        [selected_feature, ] + 1
    )

    p1 <- plot_embedding(
        data = embedding[, c(x_column, y_column)],
        color = values,
        label = paste(
            EMBEDDING_TITLE_PREFIX,
            selected_feature |> stringr::str_remove(pattern = "^E.+_"),
            sep = "; "
        ),
        color_legend = TRUE,
        sort_values = TRUE,
        rasterise = RASTERISED,
        geom_point_size = GEOM_POINT_SIZE * 1.25,
        na_value = "grey80"
    ) +
        theme_customized_embedding()

    return(p1)
}) |>
    # unlist(recursive = FALSE) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2, byrow = FALSE) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )
ENSMUSG00000009471_Myod1 
ENSMUSG00000026414_Tnnt2 
ENSMUSG00000016458_Wt1 
ENSMUSG00000025105_Bnc1 
ENSMUSG00000049382_Krt8 
ENSMUSG00000079018_Ly6c1 
ENSMUSG00000049436_Upk1b 
ENSMUSG00000021391_Cenpp 

Bar plot

barplot_helper <- function(cells, features, matrix_readcount) {
    purrr::map(names(cells), \(x) {
        calc_cpm(matrix_readcount)[
            features,
            colnames(matrix_readcount) %in% cells[[x]]
        ] |>
            Matrix::rowMeans() |>
            tibble::enframe(name = "feature") |>
            dplyr::mutate(group = x)
    }) |>
        dplyr::bind_rows()
}
cells_barplot <- list(
    embedding |>
        dplyr::filter(
            cluster == 25 & category %in% c("BL5", "BL6")
        ) |>
        dplyr::pull(cell),
    embedding |>
        dplyr::filter(
            cluster == 10 & category %in% c("BL8")
        ) |>
        dplyr::pull(cell),
    embedding |>
        dplyr::filter(
            cluster != 10 & category %in% c("BL8")
        ) |>
        dplyr::pull(cell),
    embedding |>
        dplyr::filter(
            category %in% ("BL7")
        ) |>
        dplyr::pull(cell)
)
names(cells_barplot) <- c(
    "Primary epi.", "Cluster10",
    "Other", "Uninfected MEF"
)

features_barplot <- c(
    "ENSMUSG00000021950_Anxa8",
    "ENSMUSG00000015627_Gata5",
    "ENSMUSG00000031517_Gpm6a",
    "ENSMUSG00000020911_Krt19",
    "ENSMUSG00000049436_Upk1b"
)

barplot_helper(cells_barplot, features_barplot, matrix_readcount_use) |>
    dplyr::mutate(
        feature = stringr::str_remove(string = feature, pattern = "^.+_")
    ) |>
    dplyr::mutate(
        value = log10(value + 1),
        group = factor(group, levels = names(cells_barplot))
    ) |>
    plot_barplot_simple(
        x = "group",
        y = "value",
        z = "feature",
        y_title = expression("Avg expr; log"[10] * " (CPM + 1)")
    ) +
    theme_customized_violin(
        strip_background_fill = "grey80",
        panel_border_color = "black",
        axis_text_x_angle = c(90, 1, 0.5)
    ) +
    ggplot2::scale_fill_manual(
        values = c(
            c(
                "#00BFC4",
                "#FF5722",
                "grey35",
                "grey35"
            )
        )
    )

Enrichment of exogenous factors

features_selected_10 <- c(
    "ENSMUSG00000026628_Atf3",
    "ENSMUSG00000016458_Wt1",
    "ENSMUSG00000025105_Bnc1",
    "ENSMUSG00000051910_Sox6",
    "ENSMUSG00000045680_Tcf21",
    "ENSMUSG00000038193_Hand2",
    "ENSMUSG00000031965_Tbx20",
    "ENSMUSG00000032419_Tbx18",
    "ENSMUSG00000005836_Gata6",
    "ENSMUSG00000036098_Myrf"
)
clusters_selected <- c(1, 3, 4, 6, 10, 16, 17, 22)

enriched_factors <- do.call(
    rbind.data.frame,
    lapply(clusters_selected, \(x) {
        cells_1 <- embedding$cell[
            embedding$cluster == x & embedding$category == "BL8"
        ]
        cells_2 <- embedding$cell[
            embedding$cluster != x & embedding$category == "BL8"
        ]
        cat(x, length(cells_1), length(cells_2), "\n")

        de_paired <- detect_de(
            cell_group_a = cells_1,
            cell_group_b = cells_2,
            matrix_readcount = matrix_readcount_use,
            matrix_cpm = calc_cpm(matrix_readcount_use),
            only_enrichment = TRUE
        ) |>
            dplyr::mutate(category = x) |>
            tibble::rownames_to_column(var = "feature") |>
            dplyr::filter(feature %in% features_selected_10)
    })
) |>
    dplyr::filter(category %in% clusters_selected) |>
    dplyr::mutate(
        category = factor(category,
            levels = clusters_selected
        ),
        symbol = stringr::str_remove(
            string = feature,
            pattern = "^.+_"
        )
    )
1 1473 3818 
3 932 4359 
4 826 4465 
6 687 4604 
10 974 4317 
16 178 5113 
17 87 5204 
22 61 5230 


Differential expression analysis of 10F in MEF-derived clusters, as compared with all other reprogrammed cells. Each dot represents a gene (colored by fold change and sized by p value).

`%+replace%` <- ggplot2::`%+replace%`

ggplot2::ggplot() +
    ggplot2::geom_abline(intercept = 0, slope = 1, linetype = 2) +
    ggplot2::geom_point(
        data = enriched_factors,
        ggplot2::aes(positive_frac_b,
            positive_frac_a,
            size = -log10(pval_adj),
            color = log2_effect
        ),
        alpha = .8,
        stroke = 0, shape = 16
    ) +
    ggplot2::facet_wrap(
        ~category,
        ncol = 2,
        labeller = ggplot2::labeller(
            category = setNames(
                object = paste("Cluster", clusters_selected),
                nm = clusters_selected
            )
        )
    ) +
    ggplot2::coord_fixed() +
    ggplot2::scale_color_viridis_c(
        name = expression(paste("Log"[2], " effect"))
    ) +
    ggplot2::scale_size_continuous(
        name = expression(paste("-log"[10], " (p-value)"))
    ) +
    ggplot2::guides(
        color = ggplot2::guide_colorbar(order = 1),
        size = ggplot2::guide_legend(order = 2)
    ) +
    ggplot2::scale_x_continuous(
        name = "Expr (%, other reprogrammed cells)",
        limits = c(0, 1), breaks = seq(0, 1, .2)
    ) +
    ggplot2::scale_y_continuous(
        name = "Expr (%, indicated cluster)",
        limits = c(0, 1), breaks = seq(0, 1, .2)
    ) +
    theme_customized_violin() +
    ggplot2::theme(
        legend.background = ggplot2::element_blank(),
        legend.margin = ggplot2::margin(
            t = 0, r = 0, b = 0, l = 0, unit = "mm"
        ),
        legend.key.size = ggplot2::unit(2.5, "mm"),
        legend.text = ggplot2::element_text(family = "Arial", size = 6),
        legend.title = ggplot2::element_text(family = "Arial", size = 6),
        legend.position = "right",
        # legend.box = "horizontal",
        legend.box = "vertical",
        legend.box.background = ggplot2::element_blank()
    ) +
    ggrepel::geom_text_repel(
        data = enriched_factors,
        ggplot2::aes(
            positive_frac_b,
            positive_frac_a,
            label = symbol
        ),
        #
        size = 5 / ggplot2::.pt,
        family = "Arial",
        box.padding = .2,
        point.padding = .2,
        nudge_y = .15,
        arrow = ggplot2::arrow(length = ggplot2::unit(.02, "npc")),
        segment.color = "grey35",
        color = "black"
    )


R session info

devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.1.2 (2021-11-01)
 os       macOS Monterey 12.2
 system   aarch64, darwin20.6.0
 ui       unknown
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Chicago
 date     2022-02-01
 pandoc   2.17.1.1 @ /opt/homebrew/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package       * version     date (UTC) lib source
 assertthat      0.2.1       2019-03-21 [1] CRAN (R 4.1.1)
 backports       1.4.1       2021-12-13 [1] CRAN (R 4.1.2)
 bit             4.0.4       2020-08-04 [1] CRAN (R 4.1.1)
 bit64           4.0.5       2020-08-30 [1] CRAN (R 4.1.1)
 brio            1.1.3       2021-11-30 [1] CRAN (R 4.1.2)
 broom           0.7.12      2022-01-28 [1] CRAN (R 4.1.2)
 cachem          1.0.6       2021-08-19 [1] CRAN (R 4.1.1)
 callr           3.7.0       2021-04-20 [1] CRAN (R 4.1.1)
 cellranger      1.1.0       2016-07-27 [1] CRAN (R 4.1.1)
 checkmate       2.0.0       2020-02-06 [1] CRAN (R 4.1.1)
 cli             3.1.1       2022-01-20 [1] CRAN (R 4.1.2)
 colorspace      2.0-2       2021-06-24 [1] CRAN (R 4.1.1)
 commonmark      1.7         2018-12-01 [1] CRAN (R 4.1.1)
 crayon          1.4.2       2021-10-29 [1] CRAN (R 4.1.1)
 data.table      1.14.2      2021-09-27 [1] CRAN (R 4.1.1)
 DBI             1.1.2       2021-12-20 [1] CRAN (R 4.1.2)
 dbplyr          2.1.1       2021-04-06 [1] CRAN (R 4.1.1)
 desc            1.4.0       2021-09-28 [1] CRAN (R 4.1.1)
 devtools        2.4.3.9000  2022-01-27 [1] Github (r-lib/devtools@41280ac)
 digest          0.6.29      2021-12-01 [1] CRAN (R 4.1.2)
 dplyr         * 1.0.7.9000  2022-01-30 [1] Github (tidyverse/dplyr@0d8fb82)
 dtplyr          1.2.1.9000  2022-01-27 [1] Github (tidyverse/dtplyr@8542cf1)
 ellipsis        0.3.2       2021-04-29 [1] CRAN (R 4.1.1)
 evaluate        0.14        2019-05-28 [1] CRAN (R 4.1.1)
 extrafont     * 0.17        2014-12-08 [1] CRAN (R 4.1.1)
 extrafontdb     1.0         2012-06-11 [1] CRAN (R 4.1.1)
 fansi           1.0.2       2022-01-14 [1] CRAN (R 4.1.2)
 farver          2.1.0       2021-02-28 [1] CRAN (R 4.1.1)
 fastmap         1.1.0       2021-01-25 [1] CRAN (R 4.1.1)
 forcats       * 0.5.1.9000  2022-01-27 [1] Github (tidyverse/forcats@b4dade0)
 formattable   * 0.2.1       2021-01-07 [1] CRAN (R 4.1.2)
 fs              1.5.2.9000  2022-01-27 [1] Github (r-lib/fs@6d1182f)
 gargle          1.2.0       2021-07-02 [1] CRAN (R 4.1.1)
 generics        0.1.1       2021-10-25 [1] CRAN (R 4.1.1)
 ggplot2       * 3.3.5.9000  2022-01-27 [1] Github (tidyverse/ggplot2@c89c265)
 ggrepel         0.9.1       2021-01-15 [1] CRAN (R 4.1.1)
 glue            1.6.1.9000  2022-01-27 [1] Github (tidyverse/glue@3da70df)
 googledrive     2.0.0       2021-07-08 [1] CRAN (R 4.1.1)
 googlesheets4   1.0.0       2021-07-21 [1] CRAN (R 4.1.1)
 gt              0.3.1       2021-08-07 [1] CRAN (R 4.1.2)
 gtable          0.3.0.9000  2022-01-27 [1] Github (r-lib/gtable@a0bd272)
 haven           2.4.3       2021-08-04 [1] CRAN (R 4.1.1)
 highr           0.9         2021-04-16 [1] CRAN (R 4.1.1)
 hms             1.1.1       2021-09-26 [1] CRAN (R 4.1.1)
 htmltools       0.5.2       2021-08-25 [1] CRAN (R 4.1.1)
 htmlwidgets     1.5.4       2021-09-08 [1] CRAN (R 4.1.1)
 httr            1.4.2       2020-07-20 [1] CRAN (R 4.1.1)
 jsonlite        1.7.3       2022-01-17 [1] CRAN (R 4.1.2)
 knitr           1.37        2021-12-16 [1] CRAN (R 4.1.2)
 labeling        0.4.2       2020-10-20 [1] CRAN (R 4.1.1)
 lattice         0.20-45     2021-09-22 [2] CRAN (R 4.1.2)
 lifecycle       1.0.1       2021-09-24 [1] CRAN (R 4.1.1)
 lubridate       1.8.0       2022-01-27 [1] Github (tidyverse/lubridate@566590f)
 magrittr        2.0.2       2022-01-26 [1] CRAN (R 4.1.2)
 Matrix        * 1.4-0       2021-12-08 [2] CRAN (R 4.1.2)
 memoise         2.0.1       2021-11-26 [1] CRAN (R 4.1.2)
 modelr          0.1.8.9000  2022-01-27 [1] Github (tidyverse/modelr@16168e0)
 munsell         0.5.0       2018-06-12 [1] CRAN (R 4.1.1)
 paletteer       1.4.0.9000  2022-02-01 [1] Github (EmilHvitfeldt/paletteer@c814e0d)
 patchwork     * 1.1.0.9000  2022-01-27 [1] Github (thomasp85/patchwork@79223d3)
 pillar          1.7.0       2022-02-01 [1] CRAN (R 4.1.2)
 pkgbuild        1.3.1       2021-12-20 [1] CRAN (R 4.1.2)
 pkgconfig       2.0.3       2019-09-22 [1] CRAN (R 4.1.1)
 pkgload         1.2.4       2021-11-30 [1] CRAN (R 4.1.2)
 prettyunits     1.1.1.9000  2022-01-27 [1] Github (r-lib/prettyunits@8706d89)
 prismatic       1.1.0       2021-10-17 [1] CRAN (R 4.1.2)
 processx        3.5.2       2021-04-30 [1] CRAN (R 4.1.1)
 ps              1.6.0       2021-02-28 [1] CRAN (R 4.1.1)
 purrr         * 0.3.4.9000  2022-01-27 [1] Github (tidyverse/purrr@5aca9df)
 R.cache         0.15.0      2021-04-30 [1] CRAN (R 4.1.1)
 R.methodsS3     1.8.1       2020-08-26 [1] CRAN (R 4.1.1)
 R.oo            1.24.0      2020-08-26 [1] CRAN (R 4.1.1)
 R.utils         2.11.0      2021-09-26 [1] CRAN (R 4.1.1)
 R6              2.5.1.9000  2022-01-27 [1] Github (r-lib/R6@1b05b89)
 ragg            1.2.1.9000  2022-01-27 [1] Github (r-lib/ragg@c68c666)
 Rcpp            1.0.8       2022-01-13 [1] CRAN (R 4.1.2)
 readr         * 2.1.1.9000  2022-01-30 [1] Github (tidyverse/readr@9610228)
 readxl          1.3.1.9000  2022-01-27 [1] Github (tidyverse/readxl@2ccb82c)
 rematch2        2.1.2       2020-05-01 [1] CRAN (R 4.1.1)
 remotes         2.4.2       2022-01-27 [1] Github (r-lib/remotes@9355549)
 reprex          2.0.1       2021-08-05 [1] CRAN (R 4.1.1)
 rlang           1.0.0.9000  2022-02-01 [1] Github (r-lib/rlang@47df808)
 rmarkdown       2.11.12     2022-01-27 [1] Github (rstudio/rmarkdown@b53a7ce)
 rprojroot       2.0.2       2020-11-15 [1] CRAN (R 4.1.1)
 rstudioapi      0.13.0-9000 2022-01-27 [1] Github (rstudio/rstudioapi@5d0f087)
 Rttf2pt1        1.3.9       2021-07-22 [1] CRAN (R 4.1.1)
 rvest           1.0.2       2021-10-16 [1] CRAN (R 4.1.1)
 sass            0.4.0       2021-05-12 [1] CRAN (R 4.1.1)
 scales          1.1.1.9000  2022-01-29 [1] Github (r-lib/scales@13b01f0)
 sessioninfo     1.2.2       2021-12-06 [1] CRAN (R 4.1.2)
 stringi         1.7.6       2021-11-29 [1] CRAN (R 4.1.2)
 stringr       * 1.4.0.9000  2022-01-27 [1] Github (tidyverse/stringr@85f6140)
 styler        * 1.6.2.9000  2022-01-27 [1] Github (r-lib/styler@9274aed)
 systemfonts     1.0.3       2021-10-13 [1] CRAN (R 4.1.2)
 testthat        3.1.2.9000  2022-01-27 [1] Github (r-lib/testthat@54b9db2)
 textshaping     0.3.6       2021-10-13 [1] CRAN (R 4.1.1)
 tibble        * 3.1.6.9001  2022-02-01 [1] Github (tidyverse/tibble@39da9e8)
 tidyr         * 1.1.4.9000  2022-01-27 [1] Github (tidyverse/tidyr@d46d22a)
 tidyselect      1.1.1       2021-04-30 [1] CRAN (R 4.1.1)
 tidyverse     * 1.3.1.9000  2022-01-27 [1] Github (tidyverse/tidyverse@6186fbf)
 tzdb            0.2.0       2021-10-27 [1] CRAN (R 4.1.1)
 usethis         2.1.5.9000  2022-01-27 [1] Github (r-lib/usethis@57b109a)
 utf8            1.2.2       2021-07-24 [1] CRAN (R 4.1.1)
 vctrs           0.3.8       2021-04-29 [1] CRAN (R 4.1.1)
 viridisLite     0.4.0       2021-04-13 [1] CRAN (R 4.1.1)
 vroom           1.5.7.9000  2022-01-30 [1] Github (r-lib/vroom@d91f2aa)
 withr           2.4.3       2021-11-30 [1] CRAN (R 4.1.2)
 xfun            0.29        2021-12-14 [1] CRAN (R 4.1.2)
 xml2            1.3.3       2021-11-30 [1] CRAN (R 4.1.2)
 yaml            2.2.2       2022-01-25 [1] CRAN (R 4.1.2)

 [1] /opt/homebrew/lib/R/4.1/site-library
 [2] /opt/homebrew/Cellar/r/4.1.2/lib/R/library

──────────────────────────────────────────────────────────────────────────────